#### Simulated data as in reference paper
#### method = "complete"
### Simulate 5 clusters (different behaviours) in 1 out of the 100 simulated datasets of the paper

set.seed(10)
sg <- c(500, 250, 700, 300, 100)

t <- matrix(0, nrow = 5, ncol = 5)
t[1, ] <- rep(6, 5) # positive value
t[2, ] <- c( 0,  5, 12, 13, 15) # increasing
t[3, ] <- c(15, 11,  9,  5,  0) # decreasing
t[4, ] <- c( 6, 12, 15, 10,  5) # oscillating
t[5, ] <- c(12, 17,  3,  7, 10) # convex

t_mat <- NULL
for (i in seq_len(nrow(t))) {
  t_mat <- rbind(
    t_mat,
    matrix(rep(t[i, ], sg[i]), nrow = sg[i], byrow = TRUE)
  )
}

### added random noise from Normal(mu=0,sd=1.5), max variability considered
data_15 <- matrix(NA, nrow = 2000, ncol = 5)
data_15[1:1850, ] <- matrix(
  abs(rnorm(sum(sg) * 5, sd = 1.5)),
  nrow = sum(sg),
  ncol = 5
) + t_mat

set.seed(100) # simulate outliers
data_15[1851:2000, ] <- matrix(
  runif(n = 150 * 5, min = 0, max = max(data_15, na.rm = TRUE)),
  nrow = 150,
  ncol = 5
)

### PCA
pca <- prcomp(data_15)

cols=c("red", "green", "blue", "brown4", "chocolate1", "gray")
mycols=rep(cols, c(500, 250, 700, 300, 100, 150))

mytheme <- list()
mytheme$axis.line$col="transparent"
mytheme$clip$panel="off"
trellis.par.set(mytheme)

print(cloud(pca$x[,3]~pca$x[,1]+pca$x[,2], col=mycols, pch=19, 
      xlab="PC1", 
      ylab="PC2", 
      zlab="PC3"), par.settings = mytheme) # categories are well defined even with max sd

### Run CrossClustering
cc <- cc_crossclustering(
  dist(data_15),
  k_w_min = 2,
  k_w_max = 19,
  k2_max = 20,
  out = TRUE
)

clusterCC <- cc_get_cluster(cc)

print(cloud(pca$x[,3]~pca$x[,1]+pca$x[,2], col=clusterCC, pch=19, 
            xlab="PC1", 
            ylab="PC2", 
            zlab="PC3"), par.settings = mytheme)

# compute ARI
library(mclust)
ground_truth <- c(rep(1:6, c(500, 250, 700, 300, 100, 150)))

adjustedRandIndex(clusterCC, ground_truth) # ARI=0.9794
      
      